Chapter 5 Community composition

load("resources/data.Rdata")

5.1 Taxonomy overview

5.1.1 Stacked barplot

genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  left_join(., sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  filter(count > 0) %>% #filter 0 counts
  mutate(sample=factor(sample,levels=c("Sg1","Sg2","Sg3","Sg4","Sg5","Sg6","Sg7","Sg8","Sg9","Sg10"))) %>%
  ggplot(., aes(x=sample,y=count, fill=phylum, group=phylum)) + #grouping enables keeping the same sorting of taxonomic units
    geom_bar(stat="identity", colour="white", linewidth=0.1) + #plot stacked bars with white borders
    scale_fill_manual(values=phylum_colors) +
    facet_nested(. ~ individual,  scales="free") + #facet per day and treatment
    guides(fill = guide_legend(ncol = 1)) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1),
          axis.title.x = element_blank(),
          panel.background = element_blank(),
          panel.border = element_blank(),
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          axis.line = element_line(linewidth = 0.5, linetype = "solid", colour = "black")) +
   labs(fill="Phylum",y = "Relative abundance",x="Samples")

5.1.2 Phylum relative abundances

phylum_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>%
  left_join(sample_metadata, by = join_by(sample == sample)) %>%
  left_join(genome_metadata, by = join_by(genome == genome)) %>%
  group_by(sample,phylum) %>%
  summarise(relabun=sum(count))
`summarise()` has grouped output by 'sample'. You can override using the `.groups` argument.
phylum_arrange <- phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun)) %>%
    arrange(-mean) %>%
    select(phylum) %>%
    pull()

phylum_summary %>%
    group_by(phylum) %>%
    summarise(mean=mean(relabun))
# A tibble: 10 × 2
   phylum                   mean
   <chr>                   <dbl>
 1 p__Bacillota         0.0189  
 2 p__Bacillota_A       0.236   
 3 p__Bacillota_B       0.00355 
 4 p__Bacillota_C       0.00260 
 5 p__Bacteroidota      0.218   
 6 p__Campylobacterota  0.0995  
 7 p__Cyanobacteriota   0.000493
 8 p__Desulfobacterota  0.0116  
 9 p__Pseudomonadota    0.404   
10 p__Verrucomicrobiota 0.00600 
phylum_summary %>%
    filter(phylum %in% phylum_arrange) %>%
    mutate(phylum=factor(phylum,levels=rev(phylum_arrange))) %>%
    ggplot(aes(x=relabun, y=phylum, group=phylum, color=phylum)) +
        #scale_color_manual(values=c("#F4D9AE","#DE9E46","#2D8077","#CD4F41")) +
        #geom_boxplot() +
        geom_jitter(alpha=0.5) + 
        theme_minimal()

## Taxonomy boxplot

5.1.3 Family

family_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(., genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,family) %>%
  summarise(relabun=sum(count))
`summarise()` has grouped output by 'sample'. You can override using the `.groups` argument.
family_arrange <- family_summary %>%
    group_by(family) %>%
    summarise(mean=sum(relabun)) %>%
    arrange(-mean) %>%
    select(family) %>%
    pull()

family_summary %>%
    left_join(genome_metadata %>% select(family,phylum) %>% unique(),by=join_by(family==family)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    filter(family %in% family_arrange[1:20]) %>%
    mutate(family=factor(family,levels=rev(family_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=family, group=family, color=phylum)) +
        scale_color_manual(values=phylum_colors[-8]) +
        #geom_boxplot() +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~type)+
        theme_minimal()
Warning in left_join(., genome_metadata %>% select(family, phylum) %>% unique(), : Detected an unexpected many-to-many relationship between `x` and `y`.
ℹ Row 1 of `x` matches multiple rows in `y`.
ℹ Row 7 of `y` matches multiple rows in `x`.
ℹ If a many-to-many relationship is expected, set `relationship = "many-to-many"` to silence this warning.

5.1.4 Genus

genus_summary <- genome_counts_filt %>%
  mutate_at(vars(-genome),~./sum(.)) %>% #apply TSS nornalisation
  pivot_longer(-genome, names_to = "sample", values_to = "count") %>% #reduce to minimum number of columns
  left_join(sample_metadata, by = join_by(sample == sample)) %>% #append sample metadata
  left_join(genome_metadata, by = join_by(genome == genome)) %>% #append genome metadata
  group_by(sample,genus) %>%
  summarise(relabun=sum(count)) %>%
  filter(genus != "g__")
`summarise()` has grouped output by 'sample'. You can override using the `.groups` argument.
genus_arrange <- genus_summary %>%
    group_by(genus) %>%
    summarise(mean=sum(relabun)) %>%
    filter(genus != "g__")%>%
    arrange(-mean) %>%
    select(genus) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    pull()

genus_summary %>%
    left_join(genome_metadata %>% select(genus,phylum) %>% unique(),by=join_by(genus==genus)) %>%
    left_join(sample_metadata,by=join_by(sample==sample)) %>%
    mutate(genus= sub("^g__", "", genus)) %>%
    filter(genus %in% genus_arrange[1:20]) %>%
    mutate(genus=factor(genus,levels=rev(genus_arrange[1:20]))) %>%
    filter(relabun > 0) %>%
    ggplot(aes(x=relabun, y=genus, group=genus, color=phylum)) +
        scale_color_manual(values=phylum_colors[-c(3,4,6,8)]) +
        #geom_boxplot() +
        geom_jitter(alpha=0.5) + 
        facet_grid(.~type)+
        theme_minimal()

5.2 Alpha diversity

# Calculate Hill numbers
richness <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 0) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(richness = 1) %>%
  rownames_to_column(var = "sample")

neutral <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(neutral = 1) %>%
  rownames_to_column(var = "sample")

phylogenetic <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, tree = genome_tree) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(phylogenetic = 1) %>%
  rownames_to_column(var = "sample")

# Aggregate basal GIFT into elements
dist <- genome_gifts %>%
  to.elements(., GIFT_db) %>%
  traits2dist(., method = "gower")

functional <- genome_counts_filt %>%
  column_to_rownames(var = "genome") %>%
  dplyr::select(where(~ !all(. == 0))) %>%
  hilldiv(., q = 1, dist = dist) %>%
  t() %>%
  as.data.frame() %>%
  dplyr::rename(functional = 1) %>%
  rownames_to_column(var = "sample") %>%
  mutate(functional = if_else(is.nan(functional), 1, functional))

# Merge all metrics
alpha_div <- richness %>%
  full_join(neutral, by = join_by(sample == sample)) %>%
  full_join(phylogenetic, by = join_by(sample == sample)) %>%
  full_join(functional, by = join_by(sample == sample))
alpha_div %>%
  pivot_longer(-sample, names_to = "metric", values_to = "value") %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  mutate(metric=factor(metric,levels=c("richness","neutral","phylogenetic","functional"))) %>%
      ggplot(aes(y = value, x = type, group=type, color=type, fill=type)) +
      geom_boxplot(outlier.shape = NA) +
      geom_jitter(alpha=0.5) +
      scale_color_manual(name="Sample type",
          breaks=c("cloaca","feces"),
          labels=c("Cloaca","Faeces"),
          values=c("#e5bd5b", "#6b7398")) +
      scale_fill_manual(name="Sample type",
          breaks=c("cloaca","feces"),
          labels=c("Cloaca","Faeces"),
          values=c("#e5bd5b50", "#6b739850")) +
      facet_wrap(. ~ metric, scales = "free", ncol=4) +
      coord_cartesian(xlim = c(1, NA)) +
      theme_classic() +
      theme(
        strip.background = element_blank(),
        panel.grid.minor.x = element_line(size = .1, color = "grey"),
        axis.title.x = element_blank(),
        axis.title.y = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)
      )

alpha_div %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  lmerTest::lmer(richness ~ type + (1 | individual), data = ., REML = FALSE) %>%
  broom.mixed::tidy() %>%
  tt()
boundary (singular) fit: see help('isSingular')
tinytable_2uood9ojljet2pld4e59
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 1.600000 2.987725 0.5355245 20 5.981925e-01
fixed NA typefeces 61.300000 4.225281 14.5079106 20 4.444567e-12
ran_pars individual sd__(Intercept) 0.000000 NA NA NA NA
ran_pars Residual sd__Observation 9.448016 NA NA NA NA
alpha_div %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  lmerTest::lmer(neutral ~ type + (1 | individual), data = ., REML = FALSE) %>%
  broom.mixed::tidy() %>%
  tt()
boundary (singular) fit: see help('isSingular')
tinytable_mmkel0q5vih3woebkamu
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 1.300518e+00 1.359874 0.9563518 20 3.503127e-01
fixed NA typefeces 3.294989e+01 1.923152 17.1332730 20 2.024047e-13
ran_pars individual sd__(Intercept) 4.971903e-09 NA NA NA NA
ran_pars Residual sd__Observation 4.300298e+00 NA NA NA NA
alpha_div %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  lmerTest::lmer(phylogenetic ~ type + (1 | individual), data = ., REML = FALSE) %>%
  broom.mixed::tidy() %>%
  tt()
boundary (singular) fit: see help('isSingular')
tinytable_k90grciz455pv58j14tn
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 1.2384408 0.2335543 5.302583 20 3.441968e-05
fixed NA typefeces 3.1026737 0.3302956 9.393626 20 8.961863e-09
ran_pars individual sd__(Intercept) 0.0000000 NA NA NA NA
ran_pars Residual sd__Observation 0.7385635 NA NA NA NA
alpha_div %>%
  left_join(., sample_metadata, by = join_by(sample == sample)) %>%
  lmerTest::lmer(functional ~ type + (1 | individual), data = ., REML = FALSE) %>%
  broom.mixed::tidy() %>%
  tt()
boundary (singular) fit: see help('isSingular')
tinytable_hrpni5x5gkon8fmzy8zy
effect group term estimate std.error statistic df p.value
fixed NA (Intercept) 1.052834e+00 0.03546038 29.690432 20 5.139978e-18
fixed NA typefeces 4.136317e-01 0.05014855 8.248128 20 7.248330e-08
ran_pars individual sd__(Intercept) 3.057788e-11 NA NA NA NA
ran_pars Residual sd__Observation 1.121356e-01 NA NA NA NA

5.3 Beta diversity

beta_q0n <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  hillpair(., q = 0)

beta_q1n <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  hillpair(., q = 1)

beta_q1p <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  hillpair(., q = 1, tree = genome_tree)

beta_q1f <- genome_counts_filt %>%
  column_to_rownames(., "genome") %>%
  hillpair(., q = 1, dist = dist)
#Richness
betadisper(beta_q0n$C, sample_metadata$type) %>% permutest(., pairwise = TRUE) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)
Groups     1 0.07339 0.073391 0.6442    999  0.413
Residuals 18 2.05080 0.113933                     

Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
        cloaca feces
cloaca         0.433
feces  0.43268      
adonis2(beta_q0n$C ~ type, 
        data = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))), 
        permutations = 999, 
        strata = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))) %>% select(individual) %>% pull()) %>%
        broom::tidy() %>%
        tt()
tinytable_jzyggd8l9iv3bqeh0r03
term df SumOfSqs R2 statistic p.value
type 1 3.005780 0.4664694 15.73752 0.002
Residual 18 3.437902 0.5335306 NA NA
Total 19 6.443682 1.0000000 NA NA
#Neutral diversity
betadisper(beta_q1n$C, sample_metadata$type) %>% permutest(., pairwise = TRUE) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq Mean Sq      F N.Perm Pr(>F)
Groups     1 0.01774 0.01774 0.1446    999  0.692
Residuals 18 2.20802 0.12267                     

Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
        cloaca feces
cloaca         0.694
feces  0.70818      
adonis2(beta_q1n$C ~ type, 
        data = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))), 
        permutations = 999, 
        strata = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))) %>% select(individual) %>% pull()) %>%
        broom::tidy() %>%
        tt()
tinytable_05e5en5ovgpr9bzns91o
term df SumOfSqs R2 statistic p.value
type 1 2.322537 0.362287 10.22586 0.002
Residual 18 4.088228 0.637713 NA NA
Total 19 6.410765 1.000000 NA NA
#Phylogenetic diversity
betadisper(beta_q1p$C, sample_metadata$type) %>% permutest(., pairwise = TRUE) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)
Groups     1 0.07195 0.071945 1.0792    999   0.35
Residuals 18 1.20001 0.066667                     

Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
        cloaca feces
cloaca         0.443
feces  0.31264      
adonis2(beta_q1p$C ~ type, 
        data = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))), 
        permutations = 999, 
        strata = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))) %>% select(individual) %>% pull()) %>%
        broom::tidy() %>%
        tt()
tinytable_mdety9hrdpf02w2wwlaq
term df SumOfSqs R2 statistic p.value
type 1 2.893508 0.6789794 38.07116 0.002
Residual 18 1.368047 0.3210206 NA NA
Total 19 4.261555 1.0000000 NA NA
#Functional diversity
betadisper(beta_q1f$C, sample_metadata$type) %>% permutest(., pairwise = TRUE) 

Permutation test for homogeneity of multivariate dispersions
Permutation: free
Number of permutations: 999

Response: Distances
          Df  Sum Sq  Mean Sq      F N.Perm Pr(>F)   
Groups     1 0.15953 0.159526 7.2962    999   0.01 **
Residuals 18 0.39355 0.021864                        
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Pairwise comparisons:
(Observed p-value below diagonal, permuted p-value above diagonal)
         cloaca feces
cloaca          0.006
feces  0.014616      
adonis2(beta_q1f$C ~ type, 
        data = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))), 
        permutations = 999, 
        strata = sample_metadata %>% arrange(match(sample,labels(beta_q1n$C))) %>% select(individual) %>% pull()) %>%
        broom::tidy() %>%
        tt()
tinytable_u218fyoe7gla6dfy8a97
term df SumOfSqs R2 statistic p.value
type 1 3.1347963 0.8009941 72.44958 0.002
Residual 18 0.7788359 0.1990059 NA NA
Total 19 3.9136321 1.0000000 NA NA

5.3.1 Neutral diversity plot

beta_q0n$S %>%
  vegan::metaMDS(., trymax = 500, k = 2, verbosity = FALSE, trace=FALSE) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(type) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  mutate(individual=factor(individual, levels=c("Sg1","Sg2","Sg3","Sg4","Sg5","Sg6","Sg7","Sg8","Sg9","Sg10"))) %>%
  ungroup() %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = type, shape = individual)) +
    scale_color_manual(name="Sample type",
          breaks=c("cloaca","feces"),
          labels=c("Cloaca","Faeces"),
          values=c("#e5bd5b", "#6b7398")) +
    scale_shape_manual(values = 1:10) +
    geom_point(size = 4) +
    #   stat_ellipse(aes(color = beta_q1n_nmds$Groups))+
    geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
    theme_classic() +
    theme(
      axis.text.x = element_text(size = 12),
      axis.text.y = element_text(size = 12),
      axis.title = element_text(size = 20, face = "bold"),
      axis.text = element_text(face = "bold", size = 18),
      panel.background = element_blank(),
      axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
      legend.text = element_text(size = 16),
      legend.title = element_text(size = 18),
      legend.position = "right", legend.box = "vertical"
    ) +
    labs(shape="Individual")

5.3.2 Functional diversity plot

beta_q1f$C %>%
  vegan::metaMDS(., trymax = 500, k = 2, verbosity = FALSE, trace=FALSE) %>%
  vegan::scores() %>%
  as_tibble(., rownames = "sample") %>%
  dplyr::left_join(sample_metadata, by = join_by(sample == sample)) %>%
  group_by(type) %>%
  mutate(x_cen = mean(NMDS1, na.rm = TRUE)) %>%
  mutate(y_cen = mean(NMDS2, na.rm = TRUE)) %>%
  ungroup() %>%
  mutate(individual=factor(individual, levels=c("Sg1","Sg2","Sg3","Sg4","Sg5","Sg6","Sg7","Sg8","Sg9","Sg10"))) %>%
  ggplot(aes(x = NMDS1, y = NMDS2, color = type)) +
    scale_color_manual(name="Sample type",
          breaks=c("cloaca","feces"),
          labels=c("Cloaca","Faeces"),
          values=c("#e5bd5b", "#6b7398")) +
    scale_shape_manual(values = 1:10) +
    geom_point(size = 4) +
    #   stat_ellipse(aes(color = beta_q1n_nmds$Groups))+
    geom_segment(aes(x = x_cen, y = y_cen, xend = NMDS1, yend = NMDS2), alpha = 0.9) +
    theme_classic() +
    theme(
      axis.text.x = element_text(size = 12),
      axis.text.y = element_text(size = 12),
      axis.title = element_text(size = 20, face = "bold"),
      axis.text = element_text(face = "bold", size = 18),
      panel.background = element_blank(),
      axis.line = element_line(size = 0.5, linetype = "solid", colour = "black"),
      legend.text = element_text(size = 16),
      legend.title = element_text(size = 18),
      legend.position = "right", legend.box = "vertical"
    )